This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter (in Windows press CTRL+Enter).
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I (or CTRL+Alt+I in Windows).
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
Here i am uploading my packages (code omitted).
names(wage1)
## [1] "wage" "educ" "exper" "tenure" "nonwhite" "female"
## [7] "married" "numdep" "smsa" "northcen" "south" "west"
## [13] "construc" "ndurman" "trcommpu" "trade" "services" "profserv"
## [19] "profocc" "clerocc" "servocc" "lwage" "expersq" "tenursq"
head(wage1)
# str(nlswork)
# dplyr::glimpse(nlswork)
dplyr::glimpse(wage1$lwage)
## num [1:526] 1.13 1.18 1.1 1.79 1.67 ...
## - attr(*, "label")= chr "log(wage)"
## - attr(*, "format.stata")= chr "%9.0g"
ExpData(wage1,type=1)
ExpData(wage1,type=2)
Start discussing the statistics and graphs.
## educ
## Min. : 0.00
## 1st Qu.:12.00
## Median :12.00
## Mean :12.56
## 3rd Qu.:14.00
## Max. :18.00
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
##
## [[15]]
##
## [[16]]
##
## [[17]]
# vis_dat(wage1)
# vis_miss(nlswork) # ALTERNATIVE
# gg_miss_upset(wage1)
Here I am reading a Stata data file (code omitted).
names(nlswork)
## [1] "idcode" "year" "birth_yr" "age" "race" "msp"
## [7] "nev_mar" "grade" "collgrad" "not_smsa" "c_city" "south"
## [13] "ind_code" "occ_code" "union" "wks_ue" "ttl_exp" "tenure"
## [19] "hours" "wks_work" "ln_wage"
head(nlswork)
# str(nlswork)
# dplyr::glimpse(nlswork)
dplyr::glimpse(nlswork$ln_wage)
## num [1:28534] 1.45 1.03 1.59 1.78 1.78 ...
## - attr(*, "label")= chr "ln(wage/GNP deflator)"
## - attr(*, "format.stata")= chr "%9.0g"
ExpData(nlswork,type=1)
ExpData(nlswork,type=2)
Start discussing the statistics and graphs.
## grade
## Min. : 0.00
## 1st Qu.:12.00
## Median :12.00
## Mean :12.53
## 3rd Qu.:14.00
## Max. :18.00
## NA's :2
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
## $`0`
vis_dat(nlswork)
# vis_miss(nlswork) # ALTERNATIVE
gg_miss_upset(nlswork)
stargazer(nls,
title = "Summary statistics",
label = "tb:statistcis",
table.placement = "ht",
header=FALSE,type="text")
##
## Summary statistics
## =====================================================================
## Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
## ---------------------------------------------------------------------
## idcode 28,534 2,601.284 1,487.359 1 1,327 3,881 5,159
## year 28,534 77.959 6.384 68 72 83 88
## birth_yr 28,534 48.085 3.013 41 46 51 54
## age 28,510 29.045 6.701 14.000 23.000 34.000 46.000
## race 28,534 1.303 0.482 1 1 2 3
## msp 28,518 0.603 0.489 0.000 0.000 1.000 1.000
## nev_mar 28,518 0.230 0.421 0.000 0.000 0.000 1.000
## grade 28,532 12.533 2.324 0.000 12.000 14.000 18.000
## collgrad 28,534 0.168 0.374 0 0 0 1
## not_smsa 28,526 0.282 0.450 0.000 0.000 1.000 1.000
## c_city 28,526 0.357 0.479 0.000 0.000 1.000 1.000
## south 28,526 0.410 0.492 0.000 0.000 1.000 1.000
## ind_code 28,193 7.693 2.994 1.000 5.000 11.000 12.000
## occ_code 28,413 4.778 3.065 1.000 3.000 6.000 13.000
## union 19,238 0.234 0.424 0.000 0.000 0.000 1.000
## wks_ue 22,830 2.548 7.294 0.000 0.000 0.000 76.000
## ttl_exp 28,534 6.215 4.652 0.000 2.462 9.128 28.885
## tenure 28,101 3.124 3.751 0.000 0.500 4.167 25.917
## hours 28,467 36.560 9.870 1.000 35.000 40.000 168.000
## wks_work 27,831 53.989 29.032 0.000 36.000 72.000 104.000
## ln_wage 28,534 1.675 0.478 0.000 1.361 1.964 5.264
## ---------------------------------------------------------------------
plot(nlswork$grade,nlswork$ln_wage)
## Warning: Removed 2 rows containing missing values (geom_point).
## Saving 7 x 5 in image
## Warning: Removed 2 rows containing missing values (geom_point).
## Frequencies
## nlswork$year
## Label: interview year
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 88 2272 7.96 7.96 7.96 7.96
## 77 2171 7.61 15.57 7.61 15.57
## 87 2164 7.58 23.15 7.58 23.15
## 75 2141 7.50 30.66 7.50 30.66
## 82 2085 7.31 37.97 7.31 37.97
## 85 2085 7.31 45.27 7.31 45.27
## 83 1987 6.96 52.24 6.96 52.24
## 73 1981 6.94 59.18 6.94 59.18
## 78 1964 6.88 66.06 6.88 66.06
## 71 1851 6.49 72.55 6.49 72.55
## 80 1847 6.47 79.02 6.47 79.02
## 72 1693 5.93 84.95 5.93 84.95
## 70 1686 5.91 90.86 5.91 90.86
## 68 1375 4.82 95.68 4.82 95.68
## 69 1232 4.32 100.00 4.32 100.00
## <NA> 0 0.00 100.00
## Total 28534 100.00 100.00 100.00 100.00
## Frequencies
## nlswork$race
## Label: 1=white, 2=black, 3=other
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------- --------- -------------- --------- --------------
## 1 20180 70.72 70.72 70.72 70.72
## 2 8051 28.22 98.94 28.22 98.94
## 3 303 1.06 100.00 1.06 100.00
## <NA> 0 0.00 100.00
## Total 28534 100.00 100.00 100.00 100.00
Estimating a Mincerian Wage Equation
POLS estimator with cluster-robust standard errors
##
## Regression analysis
## =================================================
## OLS panel
## linear
## OLS Pooled
## -------------------------------------------------
## Union 0.113*** 0.113***
## (0.007) (0.007)
## Collage Graduate 0.351*** 0.351***
## (0.007) (0.007)
## Age 0.022*** 0.022***
## (0.004) (0.004)
## Age sqrd. -0.0003*** -0.0003***
## (0.0001) (0.0001)
## Tenure 0.055*** 0.055***
## (0.002) (0.002)
## Tenure sqrd. -0.002*** -0.002***
## (0.0001) (0.0001)
## Not SMSA -0.205*** -0.205***
## (0.007) (0.007)
## South -0.141*** -0.141***
## (0.006) (0.006)
## City -0.032*** -0.032***
## (0.007) (0.007)
## N 19,007 19,007
## R2 0.319 0.319
## =================================================
## Notes: Standard errors in parentheses.
# ftable(c_city) # 1 if central city
pols_robust <- coeftest(pols, function(x) vcovHC(x, type = 'sss'))
stargazer(pols,pols_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled","Pooled (cluster)"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","Collage graduate","Age","Age sqrd.",
"Tenure","Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## =================================================
## panel coefficient
## linear test
## Pooled Pooled (cluster)
## -------------------------------------------------
## Union 0.112774*** 0.112774***
## (0.006774) (0.011731)
## Collage graduate 0.350946*** 0.350946***
## (0.007149) (0.014112)
## Age 0.022481*** 0.022481***
## (0.004249) (0.005373)
## Age sqrd. -0.000306*** -0.000306***
## (0.000068) (0.000088)
## Tenure 0.054787*** 0.054787***
## (0.001944) (0.002743)
## Tenure sqrd. -0.001540*** -0.001540***
## (0.000125) (0.000180)
## Not SMSA -0.205457*** -0.205457***
## (0.007120) (0.013137)
## South -0.140589*** -0.140589***
## (0.005850) (0.010964)
## City -0.031543*** -0.031543***
## (0.006683) (0.011691)
## N 19,007
## R2 0.319459
## =================================================
## Notes: Standard errors in parentheses.
for a balanced panel we have
nlswork_balanced <- read_dta("data/nlswork_balanced.dta")
re_balanced <- plm(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="random",
index=c("idcode", "year"))
summary(re_balanced)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = ln_wage ~ union + collgrad + age + agesq + tenure +
## tensq + not_smsa + south + c_city, data = nlswork_balanced,
## model = "random", index = c("idcode", "year"))
##
## Balanced Panel: n = 53, T = 12, N = 636
##
## Effects:
## var std.dev share
## idiosyncratic 0.03698 0.19230 0.319
## individual 0.07902 0.28110 0.681
## theta: 0.8063
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.904519 -0.104813 0.015673 0.114854 0.658580
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.05650211 0.20514289 5.1501 2.604e-07 ***
## union 0.06087668 0.02949229 2.0642 0.0390030 *
## collgrad 0.20128253 0.17651938 1.1403 0.2541673
## age 0.04592525 0.01334703 3.4409 0.0005799 ***
## agesq -0.00051693 0.00020986 -2.4632 0.0137701 *
## tenure 0.00332254 0.00616575 0.5389 0.5899766
## tensq 0.00011290 0.00028483 0.3964 0.6918193
## not_smsa -0.29590935 0.07567198 -3.9104 9.214e-05 ***
## south -0.06346941 0.06258349 -1.0142 0.3105084
## c_city 0.00068168 0.03609511 0.0189 0.9849322
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 32.65
## Residual Sum of Squares: 23.689
## R-Squared: 0.27447
## Adj. R-Squared: 0.26404
## Chisq: 236.819 on 9 DF, p-value: < 2.22e-16
re <- plm(data = nlswork_clean, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="random",
index=c("idcode", "year"))
summary(re)
## Oneway (individual) effect Random Effect Model
## (Swamy-Arora's transformation)
##
## Call:
## plm(formula = ln_wage ~ union + collgrad + age + agesq + tenure +
## tensq + not_smsa + south + c_city, data = nlswork_clean,
## model = "random", index = c("idcode", "year"))
##
## Unbalanced Panel: n = 4134, T = 1-12, N = 19007
##
## Effects:
## var std.dev share
## idiosyncratic 0.06476 0.25448 0.444
## individual 0.08108 0.28475 0.556
## theta:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3336 0.5920 0.6572 0.6406 0.6987 0.7502
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.77886 -0.13081 0.00854 0.00428 0.14314 3.03289
##
## Coefficients:
## Estimate Std. Error z-value Pr(>|z|)
## (Intercept) 1.1369e+00 5.0729e-02 22.4103 < 2.2e-16 ***
## union 1.0368e-01 6.4468e-03 16.0819 < 2.2e-16 ***
## collgrad 3.6931e-01 1.2344e-02 29.9171 < 2.2e-16 ***
## age 2.3031e-02 3.3174e-03 6.9424 3.856e-12 ***
## agesq -2.4910e-04 5.3181e-05 -4.6839 2.814e-06 ***
## tenure 4.0771e-02 1.5826e-03 25.7618 < 2.2e-16 ***
## tensq -1.2466e-03 1.0022e-04 -12.4393 < 2.2e-16 ***
## not_smsa -1.5114e-01 9.3804e-03 -16.1119 < 2.2e-16 ***
## south -1.1157e-01 8.4671e-03 -13.1766 < 2.2e-16 ***
## c_city 3.9713e-04 7.4923e-03 0.0530 0.9577
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1942.3
## Residual Sum of Squares: 1268
## R-Squared: 0.34903
## Adj. R-Squared: 0.34872
## Chisq: 4820.91 on 9 DF, p-value: < 2.22e-16
re_robust <- coeftest(re, function(x) vcovHC(x, type = 'sss'))
stargazer(pols,pols_robust,re,re_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled","Pooled (cluster)","RE","RE (cluster"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","College Graduate","Age","Age sqrd.",
"Tenure","Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 3,
digits.extra = 5,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================================
## panel coefficient panel coefficient
## linear test linear test
## Pooled Pooled (cluster) RE RE (cluster
## -------------------------------------------------------------------
## Union 0.113*** 0.113*** 0.104*** 0.104***
## (0.007) (0.012) (0.006) (0.009)
## College Graduate 0.351*** 0.351*** 0.369*** 0.369***
## (0.007) (0.014) (0.012) (0.013)
## Age 0.022*** 0.022*** 0.023*** 0.023***
## (0.004) (0.005) (0.003) (0.005)
## Age sqrd. -0.0003*** -0.0003*** -0.0002*** -0.0002***
## (0.0001) (0.0001) (0.0001) (0.0001)
## Tenure 0.055*** 0.055*** 0.041*** 0.041***
## (0.002) (0.003) (0.002) (0.002)
## Tenure sqrd. -0.002*** -0.002*** -0.001*** -0.001***
## (0.0001) (0.0002) (0.0001) (0.0001)
## Not SMSA -0.205*** -0.205*** -0.151*** -0.151***
## (0.007) (0.013) (0.009) (0.012)
## South -0.141*** -0.141*** -0.112*** -0.112***
## (0.006) (0.011) (0.008) (0.011)
## City -0.032*** -0.032*** 0.0004 0.0004
## (0.007) (0.012) (0.007) (0.010)
## N 19,007 19,007
## R2 0.319 0.349
## ===================================================================
## Notes: Standard errors in parentheses.
stargazer(pols,pols_robust,re,re_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled","Pooled (cluster)","RE","RE (cluster"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","College Graduate","Age","Age sqrd.",
"Tenure","Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 3,
digits.extra = 5,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================================
## panel coefficient panel coefficient
## linear test linear test
## Pooled Pooled (cluster) RE RE (cluster
## -------------------------------------------------------------------
## Union 0.113*** 0.113*** 0.104*** 0.104***
## (0.007) (0.012) (0.006) (0.009)
## College Graduate 0.351*** 0.351*** 0.369*** 0.369***
## (0.007) (0.014) (0.012) (0.013)
## Age 0.022*** 0.022*** 0.023*** 0.023***
## (0.004) (0.005) (0.003) (0.005)
## Age sqrd. -0.0003*** -0.0003*** -0.0002*** -0.0002***
## (0.0001) (0.0001) (0.0001) (0.0001)
## Tenure 0.055*** 0.055*** 0.041*** 0.041***
## (0.002) (0.003) (0.002) (0.002)
## Tenure sqrd. -0.002*** -0.002*** -0.001*** -0.001***
## (0.0001) (0.0002) (0.0001) (0.0001)
## Not SMSA -0.205*** -0.205*** -0.151*** -0.151***
## (0.007) (0.013) (0.009) (0.012)
## South -0.141*** -0.141*** -0.112*** -0.112***
## (0.006) (0.011) (0.008) (0.011)
## City -0.032*** -0.032*** 0.0004 0.0004
## (0.007) (0.012) (0.007) (0.010)
## N 19,007 19,007
## R2 0.319 0.349
## ===================================================================
## Notes: Standard errors in parentheses.
plmtest(pols, type=c("bp"))
##
## Lagrange Multiplier Test - (Breusch-Pagan) for unbalanced panels
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + ...
## chisq = 14041, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
kable(tidy(plmtest(pols, type=c("bp"))), format = "simple",caption=
"LM test for the presence of unobserved effects")
| statistic | p.value | parameter | method | alternative |
|---|---|---|---|---|
| 14041.19 | 0 | 1 | Lagrange Multiplier Test - (Breusch-Pagan) for unbalanced panels | significant effects |
# //Final slide 32
#
# *Q3*
#
fe <- plm(data = nlswork_clean, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="within", index=c("idcode", "year"))
summary(fe)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = ln_wage ~ union + collgrad + age + agesq + tenure +
## tensq + not_smsa + south + c_city, data = nlswork_clean,
## model = "within", index = c("idcode", "year"))
##
## Unbalanced Panel: n = 4134, T = 1-12, N = 19007
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.88027 -0.10216 0.00000 0.10774 2.80710
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## union 9.3877e-02 6.9662e-03 13.4761 < 2.2e-16 ***
## age 2.4259e-02 3.4467e-03 7.0383 2.031e-12 ***
## agesq -2.2618e-04 5.5316e-05 -4.0890 4.356e-05 ***
## tenure 3.2966e-02 1.6465e-03 20.0218 < 2.2e-16 ***
## tensq -1.1002e-03 1.0291e-04 -10.6916 < 2.2e-16 ***
## not_smsa -9.3105e-02 1.2970e-02 -7.1787 7.372e-13 ***
## south -6.3222e-02 1.3279e-02 -4.7611 1.944e-06 ***
## c_city 1.1409e-02 8.8964e-03 1.2824 0.1997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1119.5
## Residual Sum of Squares: 962.69
## R-Squared: 0.14005
## Adj. R-Squared: -0.099505
## F-statistic: 302.62 on 8 and 14865 DF, p-value: < 2.22e-16
stargazer(pols,re,fe,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled","RE","FE"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","College","Age","Age sqrd.","Tenure",
"Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================
## Pooled RE FE
## Union 0.112774*** 0.103677*** 0.093877***
## (0.006774) (0.006447) (0.006966)
## College 0.350946*** 0.369309***
## (0.007149) (0.012344)
## Age 0.022481*** 0.023031*** 0.024259***
## (0.004249) (0.003317) (0.003447)
## Age sqrd. -0.000306*** -0.000249*** -0.000226***
## (0.000068) (0.000053) (0.000055)
## Tenure 0.054787*** 0.040771*** 0.032966***
## (0.001944) (0.001583) (0.001646)
## Tenure sqrd. -0.001540*** -0.001247*** -0.001100***
## (0.000125) (0.000100) (0.000103)
## Not SMSA -0.205457*** -0.151136*** -0.093105***
## (0.007120) (0.009380) (0.012970)
## South -0.140589*** -0.111567*** -0.063222***
## (0.005850) (0.008467) (0.013279)
## City -0.031543*** 0.000397 0.011409
## (0.006683) (0.007492) (0.008896)
## N 19,007 19,007 19,007
## R2 0.319459 0.349029 0.140054
## ===================================================
## Notes: Standard errors in parentheses.
# Testing for fixed effects, null: OLS better than fixed
# 'F test for individual effects' <<==>> 'F test that all u_i=0'
ols_0 <- lm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city)
summary(ols_0)
##
## Call:
## lm(formula = ln_wage ~ union + age + agesq + tenure + tensq +
## not_smsa + south + c_city, data = nlswork_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7497 -0.2508 -0.0182 0.2379 3.4100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.008e+00 6.821e-02 14.776 < 2e-16 ***
## union 1.294e-01 7.181e-03 18.026 < 2e-16 ***
## age 3.929e-02 4.495e-03 8.740 < 2e-16 ***
## agesq -5.387e-04 7.175e-05 -7.509 6.24e-14 ***
## tenure 5.806e-02 2.062e-03 28.158 < 2e-16 ***
## tensq -1.699e-03 1.331e-04 -12.765 < 2e-16 ***
## not_smsa -2.311e-01 7.538e-03 -30.657 < 2e-16 ***
## south -1.475e-01 6.208e-03 -23.762 < 2e-16 ***
## c_city -3.451e-02 7.094e-03 -4.864 1.16e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4091 on 18998 degrees of freedom
## Multiple R-squared: 0.2331, Adjusted R-squared: 0.2328
## F-statistic: 721.9 on 8 and 18998 DF, p-value: < 2.2e-16
pFtest(fe, ols_0)
##
## F test for individual effects
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + ...
## F = 8.2833, df1 = 4133, df2 = 14865, p-value < 2.2e-16
## alternative hypothesis: significant effects
# generate fixed-effects
# nlswork_clean$specific_effects <- fixef(fe)
# *Q3.1*
fe_robust <- coeftest(fe, function(x) vcovHC(x, type = 'sss'))
stargazer(ols_0,fe,fe_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("OLS","FE","FE (cluster)"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","Age","Age sqrd.","Tenure",
"Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ===================================================
## OLS panel coefficient
## linear test
## OLS FE FE (cluster)
## ---------------------------------------------------
## Union 0.129446*** 0.093877*** 0.093877***
## (0.007181) (0.006966) (0.009565)
## Age 0.039289*** 0.024259*** 0.024259***
## (0.004495) (0.003447) (0.005008)
## Age sqrd. -0.000539*** -0.000226*** -0.000226***
## (0.000072) (0.000055) (0.000081)
## Tenure 0.058063*** 0.032966*** 0.032966***
## (0.002062) (0.001646) (0.002085)
## Tenure sqrd. -0.001699*** -0.001100*** -0.001100***
## (0.000133) (0.000103) (0.000126)
## Not SMSA -0.231095*** -0.093105*** -0.093105***
## (0.007538) (0.012970) (0.019790)
## South -0.147518*** -0.063222*** -0.063222***
## (0.006208) (0.013279) (0.021653)
## City -0.034506*** 0.011409 0.011409
## (0.007094) (0.008896) (0.012605)
## N 19,007 19,007
## R2 0.233124 0.140054
## ===================================================
## Notes: Standard errors in parentheses.
# *Q3.2*
linearHypothesis(ols,c("age=0","agesq=0"))
linearHypothesis(ols,c("age=0","agesq=0"), white.adjust = "hc1")
Wald_test(fe, vcov = "CR1", cluster = nlswork_clean$idcode,
constraints = constrain_zero(c("age","agesq")), test = "Naive-F")
# *LSDV Estimator=FE estimator* <<==>> takes too long
# *using a smaller sample*
nlswork_balanced <- read_dta("data/nlswork_balanced_small.dta")
LSDV <- lm(data = nlswork_balanced, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city + factor(idcode))
summary(LSDV)
##
## Call:
## lm(formula = ln_wage ~ union + age + agesq + tenure + tensq +
## not_smsa + south + c_city + factor(idcode), data = nlswork_balanced)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.254362 -0.069457 0.004535 0.078344 0.239917
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5332323 0.4915522 1.085 0.2833
## union 0.0895838 0.0548743 1.633 0.1090
## age 0.0758967 0.0334355 2.270 0.0276 *
## agesq -0.0011959 0.0005405 -2.212 0.0316 *
## tenure 0.0125105 0.0162130 0.772 0.4440
## tensq 0.0006329 0.0007176 0.882 0.3821
## not_smsa NA NA NA NA
## south NA NA NA NA
## c_city 0.1164782 0.0875330 1.331 0.1895
## factor(idcode)20 0.3287869 0.1289784 2.549 0.0140 *
## factor(idcode)126 -0.0167263 0.0546551 -0.306 0.7609
## factor(idcode)128 0.0375653 0.0882907 0.425 0.6724
## factor(idcode)379 -0.1504303 0.1188913 -1.265 0.2118
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1149 on 49 degrees of freedom
## Multiple R-squared: 0.8021, Adjusted R-squared: 0.7617
## F-statistic: 19.86 on 10 and 49 DF, p-value: 5.49e-14
fe_LSDV <- plm(data = nlswork_balanced, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city, model="within", index=c("idcode", "year"))
summary(fe_LSDV)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = ln_wage ~ union + age + agesq + tenure + tensq +
## not_smsa + south + c_city, data = nlswork_balanced, model = "within",
## index = c("idcode", "year"))
##
## Balanced Panel: n = 5, T = 12, N = 60
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.2543619 -0.0694565 0.0045351 0.0783435 0.2399171
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## union 0.08958377 0.05487432 1.6325 0.10898
## age 0.07589667 0.03343547 2.2699 0.02765 *
## agesq -0.00119586 0.00054052 -2.2124 0.03163 *
## tenure 0.01251051 0.01621304 0.7716 0.44404
## tensq 0.00063295 0.00071759 0.8821 0.38206
## c_city 0.11647818 0.08753305 1.3307 0.18945
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 2.3839
## Residual Sum of Squares: 0.64685
## R-Squared: 0.72866
## Adj. R-Squared: 0.67329
## F-statistic: 21.9312 on 6 and 49 DF, p-value: 2.4403e-12
stargazer(LSDV,fe_LSDV,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("LSDV","FE"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
covariate.labels = c("Union","Age","Age sqrd.","Tenure",
"Tenure sqrd.","Not SMSA","South","City"),
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ==================================================
## OLS panel
## linear
## LSDV FE
## --------------------------------------------------
## Union 0.089584 0.089584
## (0.054874) (0.054874)
## Age 0.075897** 0.075897**
## (0.033435) (0.033435)
## Age sqrd. -0.001196** -0.001196**
## (0.000541) (0.000541)
## Tenure 0.012511 0.012511
## (0.016213) (0.016213)
## Tenure sqrd. 0.000633 0.000633
## (0.000718) (0.000718)
## Not SMSA
##
## South
##
## City 0.116478 0.116478
## (0.087533) (0.087533)
## factor(idcode)20 0.328787**
## (0.128978)
## factor(idcode)126 -0.016726
## (0.054655)
## factor(idcode)128 0.037565
## (0.088291)
## factor(idcode)379 -0.150430
## (0.118891)
## N 60 60
## R2 0.802119 0.728663
## ==================================================
## Notes: Standard errors in parentheses.
# //Final slide 35
#
# *Q4*
fe_0 <- plm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city, model="within", index=c("idcode", "year"))
re_0 <- plm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city, model="random", index=c("idcode", "year"))
phtest(fe_0, re_0)
##
## Hausman Test
##
## data: ln_wage ~ union + age + agesq + tenure + tensq + not_smsa + south + ...
## chisq = 607.1, df = 8, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
# //Final slide 46
#
# *Q5*
be <- plm(data = nlswork_clean, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="between",
index=c("idcode", "year"))
summary(be)
## Oneway (individual) effect Between Model
##
## Call:
## plm(formula = ln_wage ~ union + collgrad + age + agesq + tenure +
## tensq + not_smsa + south + c_city, data = nlswork_clean,
## model = "between", index = c("idcode", "year"))
##
## Unbalanced Panel: n = 4134, T = 1-12, N = 19007
## Observations used in estimation: 4134
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.5946586 -0.2004452 -0.0067495 0.1909616 1.8487578
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 1.19446539 0.15356828 7.7781 9.240e-15 ***
## union 0.11135322 0.01636297 6.8052 1.154e-11 ***
## collgrad 0.34850931 0.01308193 26.6405 < 2.2e-16 ***
## age 0.02113165 0.01022918 2.0658 0.03891 *
## agesq -0.00034316 0.00016368 -2.0965 0.03610 *
## tenure 0.09570307 0.00539210 17.7488 < 2.2e-16 ***
## tensq -0.00343998 0.00036430 -9.4426 < 2.2e-16 ***
## not_smsa -0.20536231 0.01440760 -14.2537 < 2.2e-16 ***
## south -0.13058886 0.01145804 -11.3971 < 2.2e-16 ***
## c_city -0.02215011 0.01385878 -1.5983 0.11006
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 773.5
## Residual Sum of Squares: 464.95
## R-Squared: 0.3989
## Adj. R-Squared: 0.39759
## F-statistic: 304.083 on 9 and 4124 DF, p-value: < 2.22e-16
# //Final slide 53
#
# *Q6*
fd <- plm(data = nlswork_clean, ln_wage ~ 0 + union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, model="fd",
index=c("idcode", "year"))
summary(fd)
## Oneway (individual) effect First-Difference Model
##
## Call:
## plm(formula = ln_wage ~ 0 + union + collgrad + age + agesq +
## tenure + tensq + not_smsa + south + c_city, data = nlswork_clean,
## model = "fd", index = c("idcode", "year"))
##
## Unbalanced Panel: n = 4134, T = 1-12, N = 19007
## Observations used in estimation: 14873
##
## Residuals:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.9266 -0.0925 0.0073 0.0156 0.1279 3.3217
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## union 6.9160e-02 6.6336e-03 10.4257 < 2.2e-16 ***
## age 2.2744e-02 5.5436e-03 4.1027 4.104e-05 ***
## agesq -2.5853e-04 9.0084e-05 -2.8698 0.004113 **
## tenure 3.2078e-02 2.1241e-03 15.1024 < 2.2e-16 ***
## tensq -1.2023e-03 1.7526e-04 -6.8600 7.160e-12 ***
## not_smsa -7.7277e-02 1.4369e-02 -5.3780 7.645e-08 ***
## south -4.6889e-02 1.5675e-02 -2.9913 0.002782 **
## c_city 2.2987e-02 9.9149e-03 2.3185 0.020437 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 1441.5
## Residual Sum of Squares: 1404.4
## R-Squared: 0.02879
## Adj. R-Squared: 0.028332
## F-statistic: 85.5285 on 8 and 14865 DF, p-value: < 2.22e-16
stargazer(pols,re,fe,be,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("OLS","RE","FE","BE"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 6,
digits.extra = 7,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ============================================================
## OLS RE FE BE
## union 0.112774*** 0.103677*** 0.093877*** 0.111353***
## (0.006774) (0.006447) (0.006966) (0.016363)
## collgrad 0.350946*** 0.369309*** 0.348509***
## (0.007149) (0.012344) (0.013082)
## age 0.022481*** 0.023031*** 0.024259*** 0.021132**
## (0.004249) (0.003317) (0.003447) (0.010229)
## agesq -0.000306*** -0.000249*** -0.000226*** -0.000343**
## (0.000068) (0.000053) (0.000055) (0.000164)
## tenure 0.054787*** 0.040771*** 0.032966*** 0.095703***
## (0.001944) (0.001583) (0.001646) (0.005392)
## tensq -0.001540*** -0.001247*** -0.001100*** -0.003440***
## (0.000125) (0.000100) (0.000103) (0.000364)
## not_smsa -0.205457*** -0.151136*** -0.093105*** -0.205362***
## (0.007120) (0.009380) (0.012970) (0.014408)
## south -0.140589*** -0.111567*** -0.063222*** -0.130589***
## (0.005850) (0.008467) (0.013279) (0.011458)
## c_city -0.031543*** 0.000397 0.011409 -0.022150
## (0.006683) (0.007492) (0.008896) (0.013859)
## N 19,007 19,007 19,007 4,134
## R2 0.319459 0.349029 0.140054 0.398899
## ============================================================
## Notes: Standard errors in parentheses.
# H0) The null hypothesis for the Breusch-Pagan test is homoskedasticity
# takes too long to compute
# bptest(data = nlswork_clean, ln_wage ~ union +
# collgrad +age +agesq +tenure +tensq +
# not_smsa +south +c_city + factor(idcode), studentize=F)
bptest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city + factor(idcode), studentize=F)
##
## Breusch-Pagan test
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + south + c_city + factor(idcode)
## BP = 5.2368, df = 10, p-value = 0.8748
pwtest(data = nlswork_clean, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city)
pbsytest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, test="j")
##
## Baltagi and Li AR-RE joint test - balanced panel
##
## data: formula
## chisq = 50.736, df = 2, p-value = 9.612e-12
## alternative hypothesis: AR(1) errors or random effects
pbgtest(fe, order = 2)
##
## Breusch-Godfrey/Wooldridge test for serial correlation in panel models
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + south + c_city
## chisq = 181.23, df = 2, p-value < 2.2e-16
## alternative hypothesis: serial correlation in idiosyncratic errors
pwartest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city)
##
## Wooldridge's test for serial correlation in FE panels
##
## data: plm.model
## F = 12.205, df1 = 1, df2 = 53, p-value = 0.0009707
## alternative hypothesis: serial correlation
pwfdtest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city)
##
## Wooldridge's first-difference test for serial correlation in panels
##
## data: plm.model
## F = 8.5778, df1 = 1, df2 = 48, p-value = 0.005192
## alternative hypothesis: serial correlation in differenced errors
pwfdtest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city, h0="fe")
##
## Wooldridge's first-difference test for serial correlation in panels
##
## data: plm.model
## F = 2.9964, df1 = 1, df2 = 48, p-value = 0.08988
## alternative hypothesis: serial correlation in original errors
pcdtest(data = nlswork_balanced, ln_wage ~ union +
collgrad +age +agesq +tenure +tensq +
not_smsa +south +c_city)
##
## Pesaran CD test for cross-sectional dependence in panels
##
## data: ln_wage ~ union + collgrad + age + agesq + tenure + tensq + not_smsa + south + c_city
## z = 1.8975, p-value = 0.05776
## alternative hypothesis: cross-sectional dependence
## CHECK: 'lfe' and 'fixest'
### https://github.com/sgaure/lfe
### https://github.com/lrberge/fixest
# *including 1 fixed effect*
HDFE1a <- feols(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city | idcode)
summary(HDFE1a)
## OLS estimation, Dep. Var.: ln_wage
## Observations: 19,007
## Fixed-effects: idcode: 4,134
## Standard-errors: Clustered (idcode)
## Estimate Std. Error t value Pr(>|t|))
## union 0.093877 0.009566 9.814100 < 2.2e-16 ***
## age 0.024259 0.005008 4.843600 1.32e-06 ***
## agesq -0.000226 0.000081 -2.778400 0.005488 **
## tenure 0.032966 0.002085 15.807000 < 2.2e-16 ***
## tensq -0.001100 0.000126 -8.719200 < 2.2e-16 ***
## not_smsa -0.093105 0.019791 -4.704500 2.63e-06 ***
## south -0.063222 0.021654 -2.919600 0.003524 **
## c_city 0.011409 0.012606 0.905058 0.365487
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.225053 Adj. R2: 0.703151
## Within R2: 0.140054
HDFE1b <- felm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city | idcode, clustervar=c("idcode"))
## Warning: Argument(s) clustervar are deprecated and will be removed, use
## multipart formula instead
summary(HDFE1b)
##
## Call:
## felm(formula = ln_wage ~ union + age + agesq + tenure + tensq + not_smsa + south + c_city | idcode, data = nlswork_clean, clustervar = c("idcode"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8803 -0.1022 0.0000 0.1077 2.8071
##
## Coefficients:
## Estimate Cluster s.e. t value Pr(>|t|)
## union 9.388e-02 9.565e-03 9.814 < 2e-16 ***
## age 2.426e-02 5.008e-03 4.844 1.32e-06 ***
## agesq -2.262e-04 8.141e-05 -2.778 0.00549 **
## tenure 3.297e-02 2.086e-03 15.807 < 2e-16 ***
## tensq -1.100e-03 1.262e-04 -8.719 < 2e-16 ***
## not_smsa -9.310e-02 1.979e-02 -4.705 2.63e-06 ***
## south -6.322e-02 2.165e-02 -2.920 0.00352 **
## c_city 1.141e-02 1.261e-02 0.905 0.36549
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2545 on 14865 degrees of freedom
## Multiple R-squared(full model): 0.7678 Adjusted R-squared: 0.7032
## Multiple R-squared(proj model): 0.1401 Adjusted R-squared: -0.0995
## F-statistic(full model, *iid*):11.87 on 4141 and 14865 DF, p-value: < 2.2e-16
## F-statistic(proj model): 168 on 8 and 4133 DF, p-value: < 2.2e-16
# *including a 2nd fixed effect*
HDFE2a <- feols(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city | idcode + year)
summary(HDFE2a)
## OLS estimation, Dep. Var.: ln_wage
## Observations: 19,007
## Fixed-effects: idcode: 4,134, year: 12
## Standard-errors: Clustered (idcode)
## Estimate Std. Error t value Pr(>|t|))
## union 0.095700 0.009523 10.049000 < 2.2e-16 ***
## age 0.073440 0.013588 5.404700 6.86e-08 ***
## agesq -0.000720 0.000116 -6.218800 5.51e-10 ***
## tenure 0.032423 0.002104 15.408000 < 2.2e-16 ***
## tensq -0.001090 0.000129 -8.443500 < 2.2e-16 ***
## not_smsa -0.090537 0.019619 -4.614600 4.06e-06 ***
## south -0.064281 0.021622 -2.972900 0.002967 **
## c_city 0.010432 0.012668 0.823497 0.410273
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.223942 Adj. R2: 0.705857
## Within R2: 0.066421
HDFE2b <- felm(data = nlswork_clean, ln_wage ~ union +
age +agesq +tenure +tensq +
not_smsa +south +c_city | idcode + year, clustervar=c("idcode"))
## Warning: Argument(s) clustervar are deprecated and will be removed, use
## multipart formula instead
summary(HDFE2b)
##
## Call:
## felm(formula = ln_wage ~ union + age + agesq + tenure + tensq + not_smsa + south + c_city | idcode + year, data = nlswork_clean, clustervar = c("idcode"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.90155 -0.09933 0.00000 0.10738 2.78536
##
## Coefficients:
## Estimate Cluster s.e. t value Pr(>|t|)
## union 0.0956999 0.0095207 10.052 < 2e-16 ***
## age 0.0734400 0.0135842 5.406 6.80e-08 ***
## agesq -0.0007205 0.0001158 -6.221 5.44e-10 ***
## tenure 0.0324225 0.0021036 15.413 < 2e-16 ***
## tensq -0.0010902 0.0001291 -8.446 < 2e-16 ***
## not_smsa -0.0905368 0.0196138 -4.616 4.03e-06 ***
## south -0.0642811 0.0216158 -2.974 0.00296 **
## c_city 0.0104319 0.0126641 0.824 0.41014
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2533 on 14854 degrees of freedom
## Multiple R-squared(full model): 0.7701 Adjusted R-squared: 0.7059
## Multiple R-squared(proj model): 0.06642 Adjusted R-squared: -0.1945
## F-statistic(full model, *iid*):11.98 on 4152 and 14854 DF, p-value: < 2.2e-16
## F-statistic(proj model): 62.59 on 8 and 4133 DF, p-value: < 2.2e-16
See the Stata file ‘stata_do_example.do’ that produces the data in folder tmp_files.
simulated <- read_dta("data/data_simulation.dta")
# names(nlswork)
# head(nlswork)
# str(nlswork)
# eda_report(simulated,output_dir = "EDA/",output_file = "eda_simulated.pdf")
ExpData(simulated,type=1)
ExpData(simulated,type=2)
summary(simulated)
## workerid year ui quarter
## Min. : 1 Min. : 1.0 Min. :0.0004503 Min. :1.000
## 1st Qu.:124 1st Qu.: 3.0 1st Qu.:0.2438383 1st Qu.:2.000
## Median :248 Median : 5.5 Median :0.4766747 Median :2.000
## Mean :248 Mean : 5.5 Mean :0.4879205 Mean :2.517
## 3rd Qu.:372 3rd Qu.: 8.0 3rd Qu.:0.7447072 3rd Qu.:4.000
## Max. :495 Max. :10.0 Max. :0.9957856 Max. :4.000
##
## q1 wage educ exper
## Min. :0.0000 Min. : 662.1 Min. : 0.000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:1226.8 1st Qu.: 2.322 1st Qu.: 9.00
## Median :0.0000 Median :1718.1 Median : 4.646 Median :15.00
## Mean :0.2404 Mean :1998.8 Mean : 5.226 Mean :14.34
## 3rd Qu.:0.0000 3rd Qu.:2482.9 3rd Qu.: 7.635 3rd Qu.:19.00
## Max. :1.0000 Max. :7170.2 Max. :19.446 Max. :28.00
##
## union exper2 lnwage yy1 yy2
## Min. :0.0000 Min. : 0.0 Min. :6.495 Min. :0.0 Min. :0.0
## 1st Qu.:0.0000 1st Qu.: 81.0 1st Qu.:7.112 1st Qu.:0.0 1st Qu.:0.0
## Median :0.0000 Median :225.0 Median :7.449 Median :0.0 Median :0.0
## Mean :0.4863 Mean :247.3 Mean :7.483 Mean :0.1 Mean :0.1
## 3rd Qu.:1.0000 3rd Qu.:361.0 3rd Qu.:7.817 3rd Qu.:0.0 3rd Qu.:0.0
## Max. :1.0000 Max. :784.0 Max. :8.878 Max. :1.0 Max. :1.0
##
## yy3 yy4 yy5 yy6 yy7
## Min. :0.0 Min. :0.0 Min. :0.0 Min. :0.0 Min. :0.0
## 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0
## Median :0.0 Median :0.0 Median :0.0 Median :0.0 Median :0.0
## Mean :0.1 Mean :0.1 Mean :0.1 Mean :0.1 Mean :0.1
## 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0
## Max. :1.0 Max. :1.0 Max. :1.0 Max. :1.0 Max. :1.0
##
## yy8 yy9 yy10 lag_lnwage
## Min. :0.0 Min. :0.0 Min. :0.0 Min. :6.495
## 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:7.085
## Median :0.0 Median :0.0 Median :0.0 Median :7.425
## Mean :0.1 Mean :0.1 Mean :0.1 Mean :7.455
## 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:0.0 3rd Qu.:7.784
## Max. :1.0 Max. :1.0 Max. :1.0 Max. :8.724
## NA's :495
ftable(simulated$year)
## 1 2 3 4 5 6 7 8 9 10
##
## 495 495 495 495 495 495 495 495 495 495
ExpCTable(simulated)
ExpCatViz(simulated)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
ExpNumStat(simulated,by="A",Outlier = TRUE,round=2,Qnt=c(0.1,0.25,0.50,0.99))
ExpNumViz(simulated,Page=c(6,2))
## $`0`
ExpOutliers(simulated,varlist=c("lnwage"))
## $outlier_summary
## Category lnwage
## 1 Lower cap : 0.05 6.75808242272123
## 2 Upper cap : 0.95 8.32169651894011
## 3 Lower bound 6.05
## 4 Upper bound 8.87
## 5 Num of outliers 1
## 6 Lower outlier case
## 7 Upper outlier case 210
## 8 Mean before 7.48
## 9 Mean after 7.48
## 10 Median before 7.44898780138622
## 11 Median after 7.44896911785069
##
## $outlier_data
## workerid year ui quarter q1 wage educ exper union exper2
## 1: 1 1 0.5734584 2 0 1913.481 6.3080420 17 1 289
## 2: 1 2 0.5734584 2 0 2065.687 6.3080420 18 1 324
## 3: 1 3 0.5734584 2 0 2015.642 7.3080420 19 1 361
## 4: 1 4 0.5734584 2 0 2274.630 8.3080425 20 1 400
## 5: 1 5 0.5734584 2 0 2576.639 9.3080425 21 1 441
## ---
## 4946: 495 6 0.3515452 1 1 1257.155 0.0000000 17 1 289
## 4947: 495 7 0.3515452 1 1 1170.614 0.3515451 18 1 324
## 4948: 495 8 0.3515452 1 1 1211.788 0.3515451 19 1 361
## 4949: 495 9 0.3515452 1 1 1346.442 0.3515451 20 1 400
## 4950: 495 10 0.3515452 1 1 1279.959 0.3515451 21 0 441
## lnwage yy1 yy2 yy3 yy4 yy5 yy6 yy7 yy8 yy9 yy10 lag_lnwage
## 1: 7.556680 1 0 0 0 0 0 0 0 0 0 NA
## 2: 7.633218 0 1 0 0 0 0 0 0 0 0 7.556680
## 3: 7.608693 0 0 1 0 0 0 0 0 0 0 7.633218
## 4: 7.729573 0 0 0 1 0 0 0 0 0 0 7.608693
## 5: 7.854241 0 0 0 0 1 0 0 0 0 0 7.729573
## ---
## 4946: 7.136607 0 0 0 0 0 1 0 0 0 0 6.973784
## 4947: 7.065284 0 0 0 0 0 0 1 0 0 0 7.136607
## 4948: 7.099852 0 0 0 0 0 0 0 1 0 0 7.065284
## 4949: 7.205221 0 0 0 0 0 0 0 0 1 0 7.099852
## 4950: 7.154584 0 0 0 0 0 0 0 0 0 1 7.205221
## out_cap_lnwage
## 1: 7.556680
## 2: 7.633218
## 3: 7.608693
## 4: 7.729573
## 5: 7.854241
## ---
## 4946: 7.136607
## 4947: 7.065284
## 4948: 7.099852
## 4949: 7.205221
## 4950: 7.154584
##
## $outlier_index
## $outlier_index$upper_out_index
## $outlier_index$upper_out_index[[1]]
## [1] 210
##
##
## $outlier_index$lower_out_index
## $outlier_index$lower_out_index[[1]]
## numeric(0)
vis_dat(simulated)
# gg_miss_upset(simulated)
stargazer(simulated,
title = "Summary statistics",
label = "tb:statistcis",
table.placement = "ht",
header=FALSE,type="text")
##
## Summary statistics
## ==========================================================================
## Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
## --------------------------------------------------------------------------
## workerid 4,950 248.000 142.908 1 124 372 495
## year 4,950 5.500 2.873 1 3 8 10
## ui 4,950 0.488 0.289 0.0005 0.244 0.745 0.996
## quarter 4,950 2.517 1.128 1 2 4 4
## q1 4,950 0.240 0.427 0 0 0 1
## wage 4,950 1,998.779 1,032.194 662.083 1,226.806 2,482.937 7,170.242
## educ 4,950 5.226 3.795 0.000 2.322 7.635 19.446
## exper 4,950 14.336 6.460 0 9 19 28
## union 4,950 0.486 0.500 0 0 1 1
## exper2 4,950 247.252 187.126 0 81 361 784
## lnwage 4,950 7.483 0.476 6.495 7.112 7.817 8.878
## yy1 4,950 0.100 0.300 0 0 0 1
## yy2 4,950 0.100 0.300 0 0 0 1
## yy3 4,950 0.100 0.300 0 0 0 1
## yy4 4,950 0.100 0.300 0 0 0 1
## yy5 4,950 0.100 0.300 0 0 0 1
## yy6 4,950 0.100 0.300 0 0 0 1
## yy7 4,950 0.100 0.300 0 0 0 1
## yy8 4,950 0.100 0.300 0 0 0 1
## yy9 4,950 0.100 0.300 0 0 0 1
## yy10 4,950 0.100 0.300 0 0 0 1
## lag_lnwage 4,455 7.455 0.470 6.495 7.085 7.784 8.724
## --------------------------------------------------------------------------
## DASHBOARD
####### ExPanD()
OBSERVE THE MISTAKE FOLLOWING THE INTRODUCTION OF TIME DUMMIES AND EXPERIENCE IN THE FIXED-EFFECTS MODEL
pols <- plm(data = simulated, lnwage ~ educ + exper +
exper2 + factor(year),
model="pooling", index=c("workerid", "year"))
re <- plm(data = simulated, lnwage ~ educ + exper +
exper2 + factor(year),
model="random", index=c("workerid", "year"))
plmtest(pols, type=c("bp"))
##
## Lagrange Multiplier Test - (Breusch-Pagan) for balanced panels
##
## data: lnwage ~ educ + exper + exper2 + factor(year)
## chisq = 19885, df = 1, p-value < 2.2e-16
## alternative hypothesis: significant effects
fe <- plm(data = simulated, lnwage ~ educ + exper +
exper2 + factor(year),
model="within", index=c("workerid", "year"))
pFtest(fe, pols)
##
## F test for individual effects
##
## data: lnwage ~ educ + exper + exper2 + factor(year)
## F = 270.97, df1 = 493, df2 = 4444, p-value < 2.2e-16
## alternative hypothesis: significant effects
phtest(fe, re)
##
## Hausman Test
##
## data: lnwage ~ educ + exper + exper2 + factor(year)
## chisq = 309.92, df = 11, p-value < 2.2e-16
## alternative hypothesis: one model is inconsistent
pols_robust <- coeftest(pols, function(x) vcovHC(x, type = 'sss'))
re_robust <- coeftest(re, function(x) vcovHC(x, type = 'sss'))
fe_robust <- coeftest(fe, function(x) vcovHC(x, type = 'sss'))
stargazer(pols_robust,re_robust, fe_robust,title = "Regression analysis",
model.numbers = FALSE,
column.labels = c("Pooled (cluster)","RE (cluster)","FE (cluster"),
label = "regressions",
table.placement = "!ht",
notes.append = FALSE,
notes.align="l",
notes="Standard errors in parentheses.",
header = FALSE,
no.space = TRUE,
omit = c("Constant"),
omit.stat = c("adj.rsq","f","ser"),
digits = 3,
digits.extra = 5,
omit.yes.no = c("Constant",""),
dep.var.caption="",
dep.var.labels.include = FALSE,
style = "qje",
type="text")
##
## Regression analysis
## ========================================================
## Pooled (cluster) RE (cluster) FE (cluster
## educ 0.107*** 0.067*** 0.062***
## (0.002) (0.001) (0.001)
## exper 0.013*** 0.005** 0.025***
## (0.005) (0.002) (0.001)
## exper2 -0.0003* 0.000004 0.000004
## (0.0002) (0.00002) (0.00002)
## factor(year)2 -0.001 0.019*** 0.0002
## (0.004) (0.004) (0.003)
## factor(year)3 -0.002 0.038*** 0.001
## (0.005) (0.005) (0.003)
## factor(year)4 -0.008 0.052*** -0.004*
## (0.007) (0.007) (0.003)
## factor(year)5 -0.004 0.075*** -0.0002
## (0.009) (0.009) (0.002)
## factor(year)6 -0.005 0.095*** 0.001
## (0.010) (0.011) (0.003)
## factor(year)7 -0.009 0.112*** -0.001
## (0.012) (0.013) (0.003)
## factor(year)8 -0.011 0.131*** 0.00004
## (0.014) (0.015) (0.003)
## factor(year)9 -0.013 0.148*** -0.002
## (0.016) (0.017) (0.003)
## factor(year)10 -0.011 0.169***
## (0.018) (0.019)
## ========================================================
## Notes: Standard errors in parentheses.
end_time <- Sys.time()
end_time - start_time
## Time difference of 29.16753 secs
# sprintf(end_time - start_time,fmt = '%#.1f')
#